
*====================== Set Up =================================*

global path "/path/to/your/directory"

set more off
set scheme sonia_new


do $path/code/analysis/guren_programs.do

capture program drop make_graph
program define make_graph
	sum `2' if inactive == 0 & ivreg_samp == 1	
	local `2'_mean = r(mean)
	sum z_exp if inactive == 0 & ivreg_samp == 1	
	local z_mean = r(mean)
	reghdfe `2' i.z_bins if inactive == 0 & ivreg_samp == 1, absorb(zip_dw##i.purchase_year##i.list_orig_year) nocons
	mat `2' = e(b)
	mat `2' = `2''
	reghdfe `2'  c.z_exp##i.inactive $controls  if ivreg_samp == 1, absorb(zip_dw##i.purchase_year##i.list_orig_year)
	local slope_inactive = _b[z_exp] + _b[1.inactive#c.z_exp]
	local slope_active   = _b[z_exp]
	local int_inactive   = _b[_cons] + _b[1.inactive]
	local int_active     = _b[_cons]
	
	preserve
	collapse z_exp if inactive == 0 & ivreg_samp == 1, by(z_bins)
	tempfile z_exp
	save `z_exp'
	restore
	reghdfe `2' i.z_bins#i.inactive i.z_bins $controls, absorb(zip_dw##i.purchase_year##i.list_orig_year)
	preserve
	regsave
	keep if regexm(var, "z_bins") | regexm(var, "inactive") | regexm(var, "cons")
	drop if regexm(var, "o\.") |  regexm(var, "0b\.")

	split var, parse(".") gen(vars_)
	split vars_2, parse("#") gen(inactive_)
	replace vars_1 = "1" if vars_1 == "1b"
	keep if vars_3 != ""

	destring vars_1, replace force

	egen avg_effect = mean(coef)
	gen coef_demean = coef - avg_effect

	rename vars_1 z_bins
	merge m:1 z_bins using `z_exp'
	svmat `2'
	egen avg_`2' = mean(`2'1)
	gen `2'_demean = `2'1 - avg_`2'
	
	gen log_exp = z_exp
	gen inactive_effect = coef_demean + `2'_demean + ``2'_mean'
	gen active_effect = `2'_demean + ``2'_mean'
	gen ub = inactive_effect + 1.96*stderr
	gen lb = inactive_effect - 1.96*stderr

	local cons_effect_active = (`2'_demean[6]+ ``2'_mean')
	*- `slope_active'*(z_exp)
	local cons_effect_inactive = (`2'_demean[6]+ ``2'_mean')
	*- `slope_inactive'*(z_exp)
	gen z_demean = z_exp - `z_mean'
	gen test = `slope_active'*z_demean + (``2'_mean')
	*cons_effect_inactive'
	gen test2 = `slope_inactive'*z_demean + (``2'_mean')
	label var log_exp "Log(Initial Buyer Agent's Experience + 1)"
	twoway (scatter active_effect log_exp, color(color1)) ///
	  (scatter inactive_effect log_exp, color(color2)) ///	  
	  (rcap ub lb log_exp, color(color2)) ///
	  (line test log_exp, lcolor(color1)) ///
	  (line test2 log_exp, lcolor(color2)), ///
	  legend(label(1 "Buyer Agent still active") ///
	  label(2 "Buyer Agent exited") order(1 2 7) ///
	  ring(0) position(11) bmargin(large) cols(1)) name(`1', replace)	  
	restore
end

capture program drop make_graph_nocontrols
program define make_graph_nocontrols
	sum `2' if inactive == 0 & ivreg_samp == 1	
	local `2'_mean = r(mean)
	sum z_exp if inactive == 0 & ivreg_samp == 1	
	local z_mean = r(mean)
	reghdfe `2' i.z_bins if inactive == 0 & ivreg_samp == 1, absorb(zip_dw##i.purchase_year##i.list_orig_year) nocons
	mat `2' = e(b)
	mat `2' = `2''
	reghdfe `2'  c.z_exp##i.inactive, absorb(zip_dw##i.purchase_year##i.list_orig_year)
	local slope_inactive = _b[z_exp] + _b[1.inactive#c.z_exp]
	local slope_active   = _b[z_exp]
	local int_inactive   = _b[_cons] + _b[1.inactive]
	local int_active     = _b[_cons]
	
	preserve
	collapse z_exp if inactive == 0 & ivreg_samp == 1, by(z_bins)
	tempfile z_exp
	save `z_exp'
	restore
	reghdfe `2' i.z_bins#i.inactive i.z_bins, absorb(zip_dw##i.purchase_year##i.list_orig_year)
	preserve
	regsave
	keep if regexm(var, "z_bins") | regexm(var, "inactive") | regexm(var, "cons")
	drop if regexm(var, "o\.") |  regexm(var, "0b\.")

	split var, parse(".") gen(vars_)
	split vars_2, parse("#") gen(inactive_)
	replace vars_1 = "1" if vars_1 == "1b"
	keep if vars_3 != ""

	destring vars_1, replace force

	egen avg_effect = mean(coef)
	gen coef_demean = coef - avg_effect

	rename vars_1 z_bins
	merge m:1 z_bins using `z_exp'
	svmat `2'
	egen avg_`2' = mean(`2'1)
	gen `2'_demean = `2'1 - avg_`2'
	
	gen log_exp = z_exp
	gen inactive_effect = coef_demean + `2'_demean + ``2'_mean'
	gen active_effect = `2'_demean + ``2'_mean'
	gen ub = inactive_effect + 1.96*stderr
	gen lb = inactive_effect - 1.96*stderr
	
	gen z_demean = z_exp - `z_mean'
	gen test = `slope_active'*z_demean + ``2'_mean'
	gen test2 = `slope_inactive'*z_demean + ``2'_mean'
	label var log_exp "Log(Initial Buyer Agent's Experience + 1)"
	twoway (scatter active_effect log_exp, color(color1)) ///
	  (scatter inactive_effect log_exp, color(color2)) ///	  
	  (rcap ub lb log_exp, color(color2)) ///
	  (line test log_exp, lcolor(color1)) ///
	  (line test2 log_exp, lcolor(color2)), ///
	  legend(label(1 "Buyer Agent still active") ///
	  label(2 "Buyer Agent exited") order(1 2 7) ///
	  ring(0) position(11) bmargin(large) cols(1)) name(`1', replace)	   ///
	  ylabel(2.55(0.01)2.6) 
	restore
end

use "$path/data/brokerpanel_main_vars.dta",clear
keep agent_id mls_id all_experience_l1
egen max_experience = max(all_experience_l1), by(agent_id mls_id)
drop all_experience_l1
duplicates drop
tempfile max_exp
save `max_exp'

/** Load data and prep for regressions */
use "$path/data/brokerpanel_main_vars.dta",clear
keep agent_id mls_id year active all_experience_l1 exit num_bs entrant
gen experience_at_exit = all_experience_l1 if exit == 1
sort agent_id mls_id year
by agent_id mls_id: replace experience_at_exit = experience_at_exit[_n-1] if experience_at_exit == .
gen exit_year = year if exit == 1
gen entry_year = year if entrant == 1
sort agent_id mls_id year
by agent_id mls_id: replace entry_year = entry_year[_n-1] if entry_year == . & entry_year[_n-1] != .
by agent_id mls_id: replace exit_year = exit_year[_n-1] if exit_year == . & exit_year[_n-1] != .
gsort agent_id mls_id -year
by agent_id mls_id: replace entry_year = entry_year[_n-1] if entry_year == . & entry_year[_n-1] != .
by agent_id mls_id: replace exit_year = exit_year[_n-1] if exit_year == . & exit_year[_n-1] != .
tempfile broker_panel

save `broker_panel'


use $path/data/full_w_deeds_prepped_new.dta, clear
gen sold = bagent_id != ""

gen list_orig_yq = qofd(list_orig_dt_cl)
sort prop_id_dw list_orig_dt_cl 

rename lagent_id agent_id
merge m:1 mls_id agent_id using `max_exp', nogen keep(1 3)
gen year =  list_orig_year
merge m:1 agent_id mls_id year using `broker_panel', keepusing(active)
keep if _merge == 1 | _merge == 3
drop _merge
rename (agent_id active) (lagent_id lagent_active)
rename all_experience_l1 seller_experience_l1
rename max_experience seller_max_experience

sort prop_id_dw list_orig_dt_cl 
by prop_id_dw: gen lagged_bagent_id = bagent_id[_n-1]
by prop_id_dw: gen lagged_bagent_office_id = bagent_office_id[_n-1]
rename lagged_bagent_id agent_id

merge m:1 mls_id agent_id using `max_exp', nogen keep(1 3)
merge m:1 agent_id mls_id year using `broker_panel', keepusing(active all_experience_l1 experience_at_exit exit_year entry_year )
rename (agent_id active all_experience_l1 experience_at_exit max_experience) (lagged_bagent_id lagged_bagent_active lagged_buyer_all_experience_l1 lagged_buyer_exp_at_exit lagged_buyer_max_experience)
keep if _merge == 1 | _merge == 3
drop _merge

sort prop_id_dw list_orig_dt_cl 
by prop_id_dw: gen same_buyer_seller = bagent_id[_n-1] == lagent_id[_n] if _n != 1 & sold[_n-1] == 1
by prop_id_dw: gen purchase_year = close_year[_n-1] if _n != 1 & sold[_n-1] == 1
by prop_id_dw: gen same_seller = lagent_id[_n-1] == bagent_id[_n] if _n != 1 & sold[_n-1] == 1
by prop_id_dw: gen time_diff =  list_orig_dt_cl[_n] - list_orig_dt_cl[_n-1]  if _n != 1 
by prop_id_dw: gen lagged_buyer_experience_purchase = lagged_buyer_all_experience_l1[_n-1] if _n != 1
gen repeat = lagged_bagent_id == lagent_id


gen time_diff_yr = time_diff/365
gen time_diff_qtr = time_diff/90

gen iv_sample = time_diff > 180 & time_diff !=.
local exp_cutoff .
gen endog_exp = log(seller_experience_l1 + 1) if seller_max_experience < 200
gen lagged_buyer_exp = lagged_buyer_all_experience_l1 if lagged_buyer_max_experience < 200
replace lagged_buyer_exp = lagged_buyer_exp_at_exit if lagged_bagent_active == 0 & lagged_buyer_max_experience < 200

gen endog_exp_all = log(seller_experience_l1 + 1) 
gen lagged_buyer_exp_all = lagged_buyer_all_experience_l1 
replace lagged_buyer_exp_all = lagged_buyer_exp_at_exit if lagged_bagent_active == 0

gen last_seen_years = 0
replace last_seen_years = list_orig_year - exit_year  if lagged_bagent_active == 0
gen last_seen_years_share = last_seen_years/(list_orig_year - purchase_year + 1) if last_seen_years >= 0

fastxtile z_bins = lagged_buyer_exp, n(10)

replace bust = period == 1
replace medium = period == 2
replace boom = period == 3
gen z_exp_all = log(lagged_buyer_exp_all + 1)
gen z_exp = log(lagged_buyer_exp + 1)
sum z_exp,d
gen z_exp_norm = z_exp - r(mean)
sum z_exp,d
gen low_exp = z_exp < 1.38 if z_exp != .
gen no_exp = lagged_buyer_exp == 0
gen time_diff_year = round(time_diff_yr)
gen time_diff_year2 = list_orig_year - purchase_year

gen inactive = lagged_bagent_active == 0
gen exit_year_adj = exit_year
replace exit_year_adj = 0 if inactive == 0

gen Z = z_exp*inactive
gen Z_all = z_exp_all*inactive
gen Z2 = z_bins * inactive

gen Z_p50 = low_exp * lagged_bagent_active 

gen Z_bust = Z*bust
gen Z_medium = Z*medium
gen Z_bust_all = Z_all*bust
gen Z_medium_all = Z_all*medium

gen Z2_bust = Z2*bust
gen Z2_medium = Z2*medium
gen Z_p50_bust = Z_p50*bust
gen Z_p50_medium = Z_p50*medium

gen endog_exp_bust = endog_exp * bust
gen endog_exp_medium = endog_exp * medium
gen endog_exp_bust_all = endog_exp_all * bust
gen endog_exp_medium_all = endog_exp_all * medium

gen z_exp_norm_bust = z_exp_norm*bust
gen z_exp_norm_medium = z_exp_norm*medium
gen lagged_bagent_active_bust = lagged_bagent_active *bust
gen lagged_bagent_active_medium = lagged_bagent_active *medium
gen z_bins_bust = z_bins * bust
gen z_bins_medium = z_bins * medium

rename  seller_experience_l1 all_experience_l1
rename  (lagent_id lagent_active) (agent_id active)
gen logexp_temp = .
label var logexp_temp "Log(Exp + 1)"


/*** Main Specification **/
estimates clear
gen log_close_price = log_close_price_cens

foreach x in  days_to_sale dom_ggp log_list_price_orig infer_hp_orig_tier_diff {
	sum `x', d
	replace `x' = . if `x' > r(p99)
}	
gen loginfer_hp_tier_cens = loginfer_hp_tier if infer_hp_orig_tier_diff_cens != .
gen loginfer_hp_sale_diff = log_close_price - loginfer_hp_tier_cens
gen discount = log_list_price_orig - log(close_price)
gen discount_cens = discount
sum discount_cens, d
replace discount_cens = . if discount< r(p1) | discount > r(p99)
replace discount = discount_cens
gen discount2 = discount if loginfer_hp_sale_diff != .
gen log_infer_hp_orig_tier_diff_cens = log(infer_hp_orig_tier_diff_cens)

replace logexp_temp = endog_exp
gen log_list = log_list_price_orig
gen log_close = log_close_price_cens
gen linfer_price = loginfer_hp_tier_cens
gen linfer_diff = log_infer_hp_orig_tier_diff_cens

label var sale365 "Listing sold w/in 365 days"
label var days_to_sale "Days to Sale"
label var dom_ggp "Days on Market"
label var log_list "Log(List Price)"
label var log_list "Log(List Price)"
label var log_close "Log(Sale Price)"
label var discount "Log(Sale Price/List Price)"
label var discount2 "Log(Sale Price/List Price)"
label var linfer_price "Log(Inferred Price)"
label var linfer_diff "Log(Inferred Price/List Price)"


sort prop_id_dw list_orig_dt_cl close_dt_cl
by prop_id_dw list_orig_dt_cl: keep if _n == _N

merge 1:1 list_id mls_name_abbr using "$path/data/buyer_commission_data.dta", keep(1 3) gen(merge_commission)
merge 1:m  prop_id_dw list_orig_dt_cl using "$path/data/data_with_future_foreclosures.dta", keepusing(will_foreclose next_foreclosed_dt forced_sale) keep(1 3)
drop _merge
gen years_to_forecl = (next_foreclosed_dt -list_orig_dt_cl)/365
gen will_fc = will_foreclose
replace will_fc = 0 if years_to_forecl > 2
replace forced_sale = 0 if forced_sale == .
replace will_fc = . if forced_sale == 1
label var will_fc "Foreclosure in 2 years"




/** END DATA PREP **/
/** Local Variables ***/
local outcome sale365
global controls i.view_flag i.cooling_flag i.garage_flag i.garage_num_cens i.new_flag log_living_area_cl_cens i.basement_flag i.totalbaths_num_cl_cens i.totalbeds_num_cens i.waterfront_flag i.fireplace_flag
global controls2 view_flag cooling_flag garage_flag garage_num_cens new_flag log_living_area_cl_cens basement_flag totalbaths_num_cl_cens totalbeds_num_cens waterfront_flag fireplace_flag

mark control_sample
markout control_sample $controls2

tab repeat if lagged_bagent_id != "" & control_sample & iv_sample == 1

local direct_effects z_exp  i.lagged_bagent_active
local iv Z
local endog endog_exp
local direct_effects_period c.z_exp#i.period  i.lagged_bagent_active  i.lagged_bagent_active_bust i.lagged_bagent_active_medium
local direct_effects_period_all c.z_exp_all#i.period  i.lagged_bagent_active  i.lagged_bagent_active_bust i.lagged_bagent_active_medium
local direct_effects c.z_exp  i.lagged_bagent_active
local direct_effects_all c.z_exp_all  i.lagged_bagent_active 
local iv_period Z Z_bust Z_medium
local iv_period_all Z_all Z_bust_all Z_medium_all
local endog_period endog_exp endog_exp_bust endog_exp_medium
local endog_period_all endog_exp_all endog_exp_bust_all endog_exp_medium_all

forvalues t = 2002/2013 {
	gen Z_`t' = Z * (list_orig_year == `t')
	gen endog_`t' = endog_exp * (list_orig_year == `t')
	}

label var logexp "Log(Exp + 1)"
label var endog_exp "Log(Exp + 1)"
label var endog_exp_bust "Bust $\times$ Log(Exp + 1)"
label var endog_exp_medium "Medium $\times$ Log(Exp + 1)"

/*** IV FIGURE (first stage) ***/
/** Fig 1A First Stage **/
ivreghdfe sale365  (`endog_period' = `iv_period' ) if iv_sample == 1 & control_sample == 1, absorb(`direct_effects_period'  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id)
gen ivreg_samp = e(sample)

sum repeat if ivreg_samp
sum repeat if ivreg_samp & lagged_bagent_active == 1

ivreghdfe sale365  (`endog_period_all' = `iv_period_all' ) if iv_sample == 1 & control_sample == 1, absorb(`direct_effects_period'  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id)
gen ivreg_samp_all = e(sample)
sum repeat if ivreg_samp_all


/** Complier Rewight ***/
/** For complier stuff **/

sum endog_exp if   ivreg_samp ==1 & control_sample ==1, d
gen high_exp = endog_exp > r(p50)

fastxtile time_since_bin = time_diff_yr if  ivreg_samp == 1 & control_sample == 1 , n(8)
reghdfe sale365 $controls if ivreg_samp == 1 & control_sample == 1 , absorb(zip_dw##i.purchase_year##i.list_orig_year )  residuals(Z_res)
predict sale_hat, xbd
fastxtile sale_hat_bin = sale_hat, n(4)
drop Z_res
reghdfe Z c.z_exp $controls if ivreg_samp == 1 & control_sample == 1 , absorb(lagged_bagent_active zip_dw##i.purchase_year##i.list_orig_year )  residuals(Z_res)
sum Z_res if ivreg_samp == 1 & control_sample == 1 
local max = r(max)
local min = r(min)
reg  high_exp Z_res if ivreg_samp == 1 & control_sample == 1 
local complier_all = abs(_b[Z] * (`max' - `min'))
gen complier_weight = .
forvalues j = 1/3 {
	forvalues i = 1/4 {
		reghdfe Z c.z_exp $controls if ivreg_samp == 1 & control_sample == 1 & sale_hat_bin == `i' & period == `j', absorb(lagged_bagent_active zip_dw##i.purchase_year##i.list_orig_year )  residuals(Z_res_`i'_`j')
		sum Z_res_`i'_`j' if ivreg_samp == 1 & control_sample == 1 & sale_hat_bin == `i' & period == `j',
		local max = r(max)
		local min = r(min)
		reg  high_exp Z_res_`i'_`j'    if ivreg_samp == 1 & control_sample == 1 & sale_hat_bin == `i'  & period == `j'
		local complier_`i'_`j' = abs( _b[Z]*(`max' - `min'))
		local cshare =    (`complier_`i'_`j'' / `complier_all')
		replace complier_weight = `cshare' if ivreg_samp == 1 & control_sample == 1 & sale_hat_bin == `i' & period == `j'
		}
	}




/*** END PREP ***/

/*** MODEL ESTIMATION CODE ****/
* Constructing values for Matlab calibration for Sale Prob and Price
* Sale Prob
*do calibration_static_continuous_pgp.do


/* Calculate 10th and 90th percentile **/
preserve
keep if  control_sample ==1 
keep agent_id mls_id endog_exp list_orig_year
duplicates drop
sum endog_exp, d
disp "10th percentile:" `=exp(r(p10))-1'
disp "90th percentile:" `=exp(r(p90))-1'
restore

ivreghdfe `outcome'  (`endog_period'  = `iv_period' ) $controls if ivreg_samp == 1 & control_sample == 1 , absorb(`direct_effects_period'  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id)

eststo iv2_sale365_w_firststage:  ivreghdfe `outcome'  (`endog_period'  = `iv_period' ) $controls if ivreg_samp == 1 & control_sample == 1 , absorb(`direct_effects_period'  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) savefirst savefprefix(st1)

make_graph "first_stage" endog_exp
graph save  "$path/output/figures/first_stage_figure.gph", replace
graph export "$path/output/figures/first_stage_figure.pdf", replace


reghdfe endog_exp $controls, absorb(zip_dw##i.purchase_year##i.list_orig_year  )  residuals
predict endog_exp_hat, xbd
make_graph_nocontrols "exclusion" endog_exp_hat
graph save  "$path/output/figures/exclusion_figure.gph", replace
graph export "$path/output/figures/exclusion_figure.pdf", replace

make_graph "reduced_form" sale365
graph save  "$path/output/figures/reduced_form_figure.gph", replace
graph export "$path/output/figures/reduced_form_figure.pdf", replace

preserve
keep if boom == 1
make_graph "first_stage_boom" endog_exp
graph save  "$path/output/figures/first_stage_figure_boom.gph", replace
graph export "$path/output/figures/first_stage_figure_boom.pdf", replace
make_graph "reduced_form_boom" sale365
graph save  "$path/output/figures/reduced_form_figure_boom.gph", replace
graph export "$path/output/figures/reduced_form_figure_boom.pdf", replace
restore

preserve
keep if bust == 1
make_graph "first_stage_bust" endog_exp
graph save  "$path/output/figures/first_stage_figure_bust.gph", replace
graph export "$path/output/figures/first_stage_figure_bust.pdf", replace
make_graph "reduced_form_bust" sale365
graph save  "$path/output/figures/reduced_form_figure_bust.gph", replace
graph export "$path/output/figures/reduced_form_figure_bust.pdf", replace
restore


/*** Summary Statistics Table ***/
local outcome_list sale30 sale90 sale180 sale365 will_fc days_to_sale dom_ggp log_list log_close discount  linfer_diff loginfer_hp_sale_diff  discount2  
local controls $controls2

label var sale30 "Listing sold w/in 30 days"
label var sale90 "Listing sold w/in 90 days"
label var sale180 "Listing sold w/in 180 days"
label var sale365 "Listing sold w/in 365 days"
label var endog_exp  "Log(Agent Experience + 1)"
label var will_fc "Foreclosure in 2 years"
label var dom_ggp "Days on Market"
label var days_to_sale "Days to Sale"
label var log_list "Log(List Price)"
label var log_close "Log(Sale Price)"
label var discount2 "Log(Sale Price/List Price)"
label var linfer_diff "Log(List Price/Inferred Price)"
label var loginfer_hp_sale_diff "Log(Sale Price/Inferred Price)"

estpost tabstat  endog_exp `outcome_list'  if control_sample, c(stat) stat(mean p50 sd)
esttab, ///
   cells("mean(fmt(%6.2fc)) p50 sd(fmt(%6.2fc))") nonumber ///
   nomtitle nonote noobs label collabels( "Mean" "Median" "SD")
esttab using "$path/output/summary_table_all.tex", replace ///
   cells("mean(fmt(%6.2fc)) p50 sd(fmt(%6.2fc))") nonumber ///
   nomtitle nonote noobs label collabels( "Mean" "Median" "SD")


estpost tabstat  endog_exp `outcome_list'  if control_sample & iv_sample,  c(stat) stat(mean p50 sd)
esttab, ///
   cells("mean(fmt(%6.2fc)) p50 sd(fmt(%6.2fc))") nonumber ///
   nomtitle nonote noobs label collabels( "Mean" "Median" "SD")

esttab using "$path/output/summary_table_iv.tex", replace ///
   cells("mean(fmt(%6.2fc)) p50 sd(fmt(%6.2fc))") nonumber ///
   nomtitle nonote noobs label collabels( "Mean" "Median" "SD")



/*** Regression Table ***/
label var endog_exp "Log(Exp + 1)"


foreach outcome of varlist buy_commission_cens sale365 will_fc days_to_sale  dom_ggp log_list log_close discount linfer_price linfer_diff discount2  loginfer_hp_sale_diff sale30 sale90 sale180  buy_commission_cens {
	eststo ols1_`outcome': reghdfe `outcome' endog_exp if control_sample, absorb(zip_mo) cluster(mls_id)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="No"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo ols2_`outcome': reghdfe `outcome'  `endog_period' if control_sample, cluster(mls_id) absorb(zip_mo)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="No"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo ols3_`outcome': reghdfe `outcome'  `endog_period' $controls  , cluster(mls_id) absorb(zip_mo)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo sub1_`outcome': reghdfe `outcome'  `endog_period' $controls if ~missing(equity_stakes_tier) , cluster(mls_id) absorb(zip_mo)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo sub2_`outcome': reghdfe `outcome'  `endog_period' $controls loginfer_hp_tier , cluster(mls_id) absorb(zip_mo) 
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="Yes"				
   qui estadd local zip_purch_list_fe="No"

	eststo sub3_`outcome': reghdfe `outcome'  `endog_period' $controls equity_stakes_tier , cluster(mls_id) absorb(zip_mo) 
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="Yes"
	qui estadd local infer_hp="No"					
   qui estadd local zip_purch_list_fe="No"

	eststo iv1_`outcome': ivreghdfe `outcome'  (endog_exp = Z ) `direct_effects' $controls if ivreg_samp == 1 & control_sample == 1, absorb(  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) partial($controls) endog(endog_exp)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
   qui estadd local infer_hp="No"
   qui estadd local zip_purch_list_fe="Yes"
   qui estadd local direct_fe="Yes"

	eststo iv2_`outcome': ivreghdfe `outcome'  (`endog_period'  = `iv_period' ) $controls if ivreg_samp == 1 & control_sample == 1 , absorb(`direct_effects_period'  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) partial($controls) endog(`endog_period')
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
   qui estadd local infer_hp="No"
   qui estadd local zip_purch_list_fe="Yes"
   qui estadd local direct_fe="Yes"
	
	eststo iv3_`outcome': reghdfe `outcome'  `endog_period' $controls if ivreg_samp == 1 & control_sample == 1  , absorb(zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) 
   qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="Yes"				
   qui estadd local zip_purch_list_fe="No"
   qui estadd local direct_fe="No"

	eststo iv4_`outcome': reghdfe `outcome'  `endog_period' $controls if ivreg_samp == 1 & control_sample == 1 [aw=complier_weight]  , absorb(zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id)
   qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="Yes"				
   qui estadd local zip_purch_list_fe="No"
   qui estadd local direct_fe="No"


	estwrite *_`outcome' using $path/output/tables/regressions, append		
	}


clonevar linfer_sale_diff = loginfer_hp_sale_diff

foreach outcome of varlist sale365 will_fc days_to_sale  dom_ggp log_list log_close discount linfer_price linfer_diff discount2  linfer_sale_diff  buy_commission_cens {
	eststo ols1_`outcome'_indiv: reghdfe `outcome' endog_exp_all if control_sample, absorb(zip_mo) cluster(mls_id)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="No"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo ols2_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' if control_sample, cluster(mls_id) absorb(zip_mo)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="No"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo ols3_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' $controls   , cluster(mls_id) absorb(zip_mo)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo sub1_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' $controls if ~missing(equity_stakes_tier) , cluster(mls_id) absorb(zip_mo)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				
   qui estadd local zip_purch_list_fe="No"

	eststo sub2_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' $controls loginfer_hp_tier  , cluster(mls_id) absorb(zip_mo) 
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="Yes"				
   qui estadd local zip_purch_list_fe="No"

	eststo sub3_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' $controls equity_stakes_tie , cluster(mls_id) absorb(zip_mo) 
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="Yes"
	qui estadd local infer_hp="No"					
   qui estadd local zip_purch_list_fe="No"

	eststo iv1_`outcome'_indiv: ivreghdfe `outcome'  (endog_exp_all = Z_all ) `direct_effects_all' $controls if ivreg_samp_all == 1 & control_sample == 1, absorb(  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) partial($controls) endog(endog_exp_all)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
   qui estadd local infer_hp="No"
   qui estadd local zip_purch_list_fe="Yes"
   qui estadd local direct_fe="Yes"

	eststo iv2_`outcome'_indiv: ivreghdfe `outcome'  (`endog_period_all'  = `iv_period_all' ) $controls if ivreg_samp_all == 1 & control_sample == 1 , absorb(`direct_effects_period_all'  zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) partial($controls) endog(`endog_period_all')
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
   qui estadd local infer_hp="No"
   qui estadd local zip_purch_list_fe="Yes"
   qui estadd local direct_fe="Yes"
	
	eststo iv3_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' $controls if ivreg_samp_all == 1 & control_sample == 1  , absorb(zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id) 
   qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="Yes"				
   qui estadd local zip_purch_list_fe="No"
   qui estadd local direct_fe="No"

	eststo iv4_`outcome'_indiv: reghdfe `outcome'  `endog_period_all' $controls if ivreg_samp_all == 1 & control_sample == 1 [aw=complier_weight]  , absorb(zip_dw##i.purchase_year##i.list_orig_year  ) cluster(mls_id)
   qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="Yes"				
   qui estadd local zip_purch_list_fe="No"
   qui estadd local direct_fe="No"


	estwrite *_`outcome'_indiv using $path/output/tables/regressions, append		
	}

/*** Agent Fixed Effects Table ***/
estimates clear
foreach outcome of varlist  will_fc sale365  linfer_diff  loginfer_hp_sale_diff discount2 buy_commission_cens {
qui eststo agent_`outcome': reghdfe  `outcome'  `endog_period' $controls, cluster(zip_dw) absorb(zip_mo agent_id)
qui estadd local zip_time_fe="Yes"
qui estadd local house_char_fe="Yes"
qui estadd local agent_fe="Yes"	
estwrite agent_`outcome' using $path/output/tables/regressions, append		
}


do $path/code/analysis/construct_tables.do

/*** Figure 1 **/
reghdfe sale365 endog_exp if control_sample, absorb(zip_mo) cluster(mls_id)
local b = _b[endog_exp]
local b = string(`b', "%08.3fc")
local se = _se[endog_exp]
local se = string(`se', "%08.3fc")

binscatter2 sale365 endog_exp if control_sample,  absorb(zip_mo)  altcontrols lcolor(black )  ytitle("") xtitle("Log(Agent Experience+1)") text(0.6 0.5 "{&beta} = `b' " "(`se')", place(ne) justification(right)) 
graph export "$path/output/figures/sale_prob_experience.pdf",replace

/*** Effects by year on sale365 **/
fastxtile exp_bin = endog_exp if control_sample ==1, n(3)
forvalues i = 2002/2013 {
	gen _logexp_`i' = endog_exp * (list_orig_year == `i')
}

reghdfe sale365   _logexp_*  if list_orig_year > 2001 & control_sample == 1, cluster(mls_id) absorb(zip_mo, savefe)
estimates save $path/output/sale365_yearly, replace

estimates use $path/output/sale365_yearly
preserve
sum endog_exp if exp_bin == 3
local counterfactual_val = r(mean)
replace endog_exp = `counterfactual_val' if exp_bin == 1 | exp_bin == 2
forvalues i = 2002/2013 {
	replace _logexp_`i' = endog_exp * (list_orig_year == `i')
	}
estimates use $path/output/sale365_yearly
predict sale365_cf, xb
replace sale365_cf = sale365_cf + __hdfe1__ if sale365_cf != .

fcollapse sale365 sale365_cf if sale365_cf != ., by(list_orig_year)
keep if inrange(list_orig_year, 2002,2013)
twoway line sale365_cf sale365  list_orig_year, ///
  lpattern(solid dash)  xlabel(2002(2)2013) ///
  legend(label(2 "Data") label(1 "Counterfactual: high experience")) ///
  ylabel(0.45(0.1)0.8, format(%9.2f)) xtitle("List Year")
graph export "$path/output/figures/sale2_counterfact.pdf", replace
save "$path/output/tables/sale365_counterfactual.dta", replace
restore

preserve
mat b = e(b)
mat V = vecdiag(e(V))
clear
mat b = b'
mat V = V'
svmat b
svmat V
keep if _n < 13
replace V1 = sqrt(V)
gen ub = b1 + 1.96*V1
gen lb = b1 - 1.96*V1
gen year = 2001+_n
twoway (rcap ub lb year) (scatter b year), yline(0) xlabel(2002(4)2013) xtitle("Listing Year") legend(off) ylabel(,format(%9.2f))
graph export "$path/output/figures/sale_prob_experience_yearly.pdf",replace
restore

reghdfe sale365   _logexp_*  $controls if control_sample== 1, cluster(mls_id) absorb(zip_mo, savefe)
estimates save $path/output/sale365_yearly, replace

estimates use $path/output/sale365_yearly
predict sale365_actual if  control_sample== 1, xb
preserve
sum logexp_temp if exp_bin == 3
local counterfactual_val = r(mean)
replace endog_exp = `counterfactual_val' if exp_bin == 1 | exp_bin == 2
forvalues i = 2002/2013 {
	replace _logexp_`i' = endog_exp * (list_orig_year == `i')
	}
estimates use $path/output/sale365_yearly
predict sale365_cf, xb
replace sale365_cf = sale365_cf + __hdfe1__ if sale365_cf != .

fcollapse sale365  sale365_cf if sale365_cf != ., by(list_orig_year)
keep if inrange(list_orig_year, 2002,2013)
twoway line sale365_cf sale365  list_orig_year, ///
  lpattern(solid dash)  xlabel(2002(2)2013) ///
  legend(label(2 "Data") label(1 "Counterfactual: high experience")) ///
  ylabel(0.45(0.1)0.8, format(%9.2f)) xtitle("List Year")
graph export "$path/output/figures/sale2_counterfact_w_controls.pdf", replace
save "$path/output/tables/sale365_counterfactual_w_controls.dta", replace
restore


preserve
mat b = e(b)
mat V = vecdiag(e(V))
clear
mat b = b'
mat V = V'
svmat b
svmat V
keep if _n < 13
replace V1 = sqrt(V)
gen ub = b1 + 1.96*V1
gen lb = b1 - 1.96*V1
gen year = 2001+_n
twoway (rcap ub lb year) (scatter b year), yline(0) xlabel(2002(4)2013) xtitle("Listing Year") legend(off) ylabel(,format(%9.2f))
graph export "$path/output/figures/sale_prob_experience_yearly_w_controls.pdf",replace
restore

preserve
collapse sale365, by(list_orig_year)
tempfile sale_data
save `sale_data'
restore

/*** Foreclosure Graphs and year-by-year effects ***/
reghdfe will_fc endog_exp if forced_sale == 0 & control_sample == 1, absorb(zip_mo) cluster(mls_id)
local b = _b[endog_exp]
local b = string(`b', "%8.4fc")
local se = _se[endog_exp]
local se = string(`se', "%8.4fc")

binscatter2  will_fc endog_exp if forced_sale == 0 & control_sample == 1, absorb(zip_mo)  lcolor(black ) altcontrols mcolor(navy) ///
  ytitle("") xtitle("Log(Agent Experience+1)") text(0.012 4 "{&beta} = `b' " "(`se')", place(ne) justification(right)) 
graph export "$path/output/figures/foreclosure_prob_experience.pdf",replace

preserve
collapse will_fc if forced_sale == 0 & control_sample == 1, by(list_orig_year sale2)
reshape wide will_fc, i(list_orig_year) j(sale2)
twoway line will_fc0 will_fc1 list_orig_year, ///
  lwidth(medthick medthick) lpattern(dash solid) xtitle("Year") legend(label(1 "Unsold Listing") label(2 "Sold Listing")) ///
  xlabel(2001(3)2013) ylabel(,format(%9.2f))
graph export "$path/output/figures/foreclosure_prob_by_sale.pdf",replace
restore
reghdfe will_fc   _logexp_* $controls if forced_sale == 0 & control_sample == 1, cluster(mls_id) absorb(zip_mo, savefe) resid
estimates save $path/output/will_fc_yearly, replace

estimates use $path/output/will_fc_yearly
preserve
sum endog_exp if exp_bin == 3 & forced_sale == 0 & control_sample == 1
local counterfactual_val = r(mean)
replace endog_exp = `counterfactual_val' if exp_bin == 1 | exp_bin == 2
forvalues i = 2002/2013 {
	replace _logexp_`i' = endog_exp * (list_orig_year == `i')
	}
estimates use $path/output/will_fc_yearly
predict will_fc_cf, xb
replace will_fc_cf = will_fc_cf + __hdfe1__ if will_fc_cf != . & forced_sale == 0 & control_sample == 1

fcollapse will_fc  will_fc_cf if will_fc_cf != ., by(list_orig_year)
keep if inrange(list_orig_year, 2001,2013)
replace will_fc_cf = . if list_orig_year > 2012
replace will_fc = . if list_orig_year > 2012
twoway line will_fc_cf will_fc  list_orig_year, ///
  lpattern(solid dash)  xlabel(2002(2)2013) ///
  legend(label(2 "Data") label(1 "Counterfactual: high experience")) ///
  ylabel(0.0(0.005)0.025, format(%9.3f)) xtitle("List Year")
graph export "$path/output/figures/will_fc_counterfact_w_controls.pdf", replace
save "$path/output/tables/will_fc_counterfactual_w_controls.dta", replace
restore

/* reghdfe will_fc   _logexp_* if list_orig_year > 2001 & forced_sale == 0 & control_sample == 1, cluster(mls_id) absorb(zip_mo, savefe) resid */
/* estimates save $path/output/will_fc_yearly, replace */

/* estimates use $path/output/will_fc_yearly */
/* preserve */
/* sum endog_exp if exp_bin == 3 */
/* local counterfactual_val = r(mean) */
/* replace endog_exp = `counterfactual_val' if exp_bin == 1 | exp_bin == 2 */
/* forvalues i = 2002/2013 { */
/* 	replace _logexp_`i' = endog_exp * (list_orig_year == `i') */
/* 	} */
/* estimates use $path/output/will_fc_yearly */
/* predict will_fc_cf if list_orig_year > 2001 & forced_sale == 0 & control_sample == 1, xb */
/* replace will_fc_cf = will_fc_cf + __hdfe1__ if will_fc_cf != . */

/* fcollapse will_fc will_fc_cf if will_fc_cf != ., by(list_orig_year) */
/* keep if inrange(list_orig_year, 2001,2013) */
/* replace will_fc_cf = . if list_orig_year > 2012 */
/* replace will_fc = . if list_orig_year > 2012 */
/* twoway line will_fc_cf will_fc  list_orig_year, /// */
/*   lpattern(solid dash)  xlabel(2002(2)2013) /// */
/*   legend(label(2 "Data") label(1 "Counterfactual: high experience")) /// */
/*   ylabel(0.0(0.005)0.025, format(%9.3f)) xtitle("List Year") */
/* graph export "$path/output/figures/will_fc_counterfact.pdf", replace */
/* save "$path/output/tables/will_fc_counterfactual.dta", replace */
/* restore */


/* preserve */
/* use "$path/output/tables/will_fc_counterfactual_w_controls", clear */
/* merge 1:1 list_orig_year using "$path/output/tables/sale365_counterfactual_w_controls.dta" */

/* replace will_fc_cf = . if list_orig_year > 2012 */
/* replace will_fc = . if list_orig_year > 2012 */
/* gen perc_ch_sale = 100*(sale365_cf - sale365)/ sale365 */
/* gen perc_ch_fc = 100*(will_fc_cf - will_fc)/ will_fc_cf */

/* capture file close fh */
/* file open fh using "$path/output/tables/counterfactual_table.tex", replace write */
/* forvalues i = 1/12 { */
/* local list_orig_year = list_orig_year[`i'] */
/* local will_fc_actual = string(will_fc[`i'], "%9.3f") */
/* local will_fc_cf = string(will_fc_cf[`i'], "%9.3f") */
/* local will_fc_diff = string(perc_ch_fc[`i'], "%9.1f") */
/* local sale365_actual = string(sale365[`i'], "%9.2f") */
/* local sale365_cf = string(sale365_cf[`i'], "%9.2f") */
/* local sale365_diff = string(perc_ch_sale[`i'], "%9.1f") */
/* local line  "`list_orig_year' & `sale365_actual' & `sale365_cf' & `sale365_diff' & & `will_fc_actual' & `will_fc_cf' & `will_fc_diff' \\ " */
/* disp "`line'" */
/* file write fh "`line'" _n */
/* } */
/* file close fh */
/* restore */

/*** Sale Probability against Log(Infer Price / List Price) **/
fastxtile exp_bin2 = endog_exp if control_sample ==1, n(10)
preserve
keep if log_infer_hp_orig_tier_diff_cens != . & control_sample
gen holding_years = t_2 - t_0

expand 2, gen(expand_id)
replace exp_bin = 0 if expand_id == 1
egen num_obs = count(log_infer_hp_orig_tier_diff_cens), by(zip_mo)
egen num_obs2 = count(log_infer_hp_orig_tier_diff_cens), by(zip_mo exp_bin)

fastxtile infer_bin = log_infer_hp_orig_tier_diff_cens if  inrange(infer_hp_orig_tier_diff, 0.5, 1.5) & num_obs > 2, n(20)

/*** OLS Stuff ***/
reghdfe sale365 i.infer_bin#i.exp_bin $controls if bust == 1 , absorb(zip_mo)
estimates store sale365_infer_bust
reghdfe sale365 i.infer_bin#i.exp_bin $controls if boom == 1 , absorb(zip_mo)
estimates store sale365_infer_boom
reghdfe sale365 i.infer_bin#i.exp_bin $controls , absorb(zip_mo)
estimates store sale365_infer
reghdfe loginfer_hp_sale_diff i.infer_bin#i.exp_bin $controls , absorb(zip_mo)
estimates store saleprice_infer
reghdfe sale90 i.infer_bin#i.exp_bin $controls , absorb(zip_mo)
estimates store sale90_infer
reghdfe sale365 i.infer_bin#i.exp_bin $controls if state == "CA" , absorb(zip_mo)
estimates store sale90_infer_CA
sum sale365 if infer_bin != . & control_sample == 1 & infer_bin == 1 & exp_bin == 0
local sale365_mean = r(mean)
sum loginfer_hp_sale_diff if infer_bin != . & control_sample == 1 & infer_bin == 1 & exp_bin == 0
local saleprice_mean = r(mean)
sum sale365 if infer_bin != . & control_sample == 1 & bust == 1 & infer_bin == 1 & exp_bin == 0
local sale365_mean_bust = r(mean)
sum sale365 if infer_bin != . & control_sample == 1 & boom == 1 & infer_bin == 1 & exp_bin == 0
local sale365_mean_boom = r(mean)
sum sale90 if infer_bin != . & control_sample == 1 & infer_bin == 1 & exp_bin == 0
local sale90_mean = r(mean)
sum sale90 if infer_bin != . & state == "CA" & control_sample == 1 & infer_bin == 1 & exp_bin == 0
local sale90_mean_CA = r(mean)

fcollapse  log_infer_hp_orig_tier_diff_cens, by(infer_bin)
drop if infer_bin == .
tempfile infer
save `infer'

estimates restore sale365_infer
regsave
keep if regexm(var, "exp_bin")
split var, parse("#") gen(var_)
split var_1, parse(".") gen(a_)
rename a_1 infer_bin
replace infer_bin = "1" if infer_bin == "1b"
destring infer_bin, replace 
drop a_2 var_1
split var_2, parse(".") gen(a_)
rename a_1 exp_bin
replace exp_bin = "0" if exp_bin == "0b"
destring exp_bin, replace 
drop a_2 var_2
merge m:1 infer_bin using `infer', nogen keep(3)
rename log_infer_hp_orig_tier_diff_cens log_infer
gen coef_norm = coef + `sale365_mean'
twoway (scatter coef_norm log_infer if exp_bin == 0,  msymbol(T)) ///
  (scatter coef_norm log_infer if exp_bin == 1, msymbol(Oh)) ///
  (scatter coef_norm log_infer if exp_bin == 2, msymbol(Th)) ///
  (scatter coef_norm log_infer if exp_bin == 3, msymbol(Sh)), ///
  xtitle("  Log(List Price / Inferred Price)")  legend(label(1 "Overall") label(2 "Bottom Exp. Tercile") label(3 "Middle Exp. Tercile") label(4 "Top Exp. Tercile")) ytitle("")
graph export "$path/output/figures/sale_prob_vs_infer_by_experience.pdf",replace

estimates restore saleprice_infer
regsave
keep if regexm(var, "exp_bin")
split var, parse("#") gen(var_)
split var_1, parse(".") gen(a_)
rename a_1 infer_bin
replace infer_bin = "1" if infer_bin == "1b"
destring infer_bin, replace 
drop a_2 var_1
split var_2, parse(".") gen(a_)
rename a_1 exp_bin
replace exp_bin = "0" if exp_bin == "0b"
destring exp_bin, replace 
drop a_2 var_2
merge m:1 infer_bin using `infer', nogen keep(3)
rename log_infer_hp_orig_tier_diff_cens log_infer
gen coef_norm = coef  + `saleprice_mean'
twoway (scatter coef_norm log_infer if exp_bin == 0,  msymbol(T)) ///
  (scatter coef_norm log_infer if exp_bin == 1, msymbol(Oh)) ///
  (scatter coef_norm log_infer if exp_bin == 2, msymbol(Th)) ///
  (scatter coef_norm log_infer if exp_bin == 3, msymbol(Sh)), ///
  xtitle("  Log(List Price / Inferred Price)")  legend(label(1 "Overall") label(2 "Bottom Exp. Tercile") label(3 "Middle Exp. Tercile") label(4 "Top Exp. Tercile")) ytitle("")
graph export "$path/output/figures/sale_price_vs_infer_by_experience.pdf",replace


estimates restore sale90_infer
regsave
keep if regexm(var, "exp_bin")
split var, parse("#") gen(var_)
split var_1, parse(".") gen(a_)
rename a_1 infer_bin
replace infer_bin = "1" if infer_bin == "1b"
destring infer_bin, replace 
drop a_2 var_1
split var_2, parse(".") gen(a_)
rename a_1 exp_bin
replace exp_bin = "0" if exp_bin == "0b"
destring exp_bin, replace 
drop a_2 var_2
merge m:1 infer_bin using `infer', nogen keep(3)
rename log_infer_hp_orig_tier_diff_cens log_infer
gen coef_norm = coef + `sale90_mean'
twoway (scatter coef_norm log_infer if exp_bin == 0,  msymbol(T)) ///
  (scatter coef_norm log_infer if exp_bin == 1, msymbol(Oh)) ///
  (scatter coef_norm log_infer if exp_bin == 2, msymbol(Th)) ///
  (scatter coef_norm log_infer if exp_bin == 3, msymbol(Sh)), ///
  xtitle("  Log(List Price / Inferred Price)")  legend(label(1 "Overall") label(2 "Bottom Exp. Tercile") label(3 "Middle Exp. Tercile") label(4 "Top Exp. Tercile")) ytitle("")
graph export "$path/output/figures/sale_prob_vs_infer_by_experience_guren1.pdf",replace

estimates restore sale90_infer_CA
regsave
keep if regexm(var, "exp_bin")
split var, parse("#") gen(var_)
split var_1, parse(".") gen(a_)
rename a_1 infer_bin
replace infer_bin = "1" if infer_bin == "1b"
destring infer_bin, replace 
drop a_2 var_1
split var_2, parse(".") gen(a_)
rename a_1 exp_bin
replace exp_bin = "0" if exp_bin == "0b"
destring exp_bin, replace 
drop a_2 var_2
merge m:1 infer_bin using `infer', nogen keep(3)
rename log_infer_hp_orig_tier_diff_cens log_infer
gen coef_norm = coef + `sale90_mean_CA'
twoway (scatter coef_norm log_infer if exp_bin == 0,  msymbol(T)) ///
  (scatter coef_norm log_infer if exp_bin == 1, msymbol(Oh)) ///
  (scatter coef_norm log_infer if exp_bin == 2, msymbol(Th)) ///
  (scatter coef_norm log_infer if exp_bin == 3, msymbol(Sh)), ///
  xtitle("  Log(List Price / Inferred Price)")  legend(label(1 "Overall") label(2 "Bottom Exp. Tercile") label(3 "Middle Exp. Tercile") label(4 "Top Exp. Tercile")) ytitle("")
graph export "$path/output/figures/sale_prob_vs_infer_by_experience_guren2.pdf",replace


estimates restore sale365_infer_bust
regsave
keep if regexm(var, "exp_bin")
split var, parse("#") gen(var_)
split var_1, parse(".") gen(a_)
rename a_1 infer_bin
replace infer_bin = "1" if infer_bin == "1b"
destring infer_bin, replace 
drop a_2 var_1
split var_2, parse(".") gen(a_)
rename a_1 exp_bin
replace exp_bin = "0" if exp_bin == "0b"
destring exp_bin, replace 
drop a_2 var_2
merge m:1 infer_bin using `infer', nogen keep(3)
rename log_infer_hp_orig_tier_diff_cens log_infer
gen coef_norm = coef + `sale365_mean_bust'
twoway (scatter coef_norm log_infer if exp_bin == 0,  msymbol(T)) ///
  (scatter coef_norm log_infer if exp_bin == 1, msymbol(Oh)) ///
  (scatter coef_norm log_infer if exp_bin == 2, msymbol(Th)) ///
  (scatter coef_norm log_infer if exp_bin == 3, msymbol(Sh)), ///
  xtitle("  Log(List Price / Inferred Price)")  legend(label(1 "Overall") label(2 "Bottom Exp. Tercile") label(3 "Middle Exp. Tercile") label(4 "Top Exp. Tercile")) ytitle("")
graph export "$path/output/figures/sale_prob_vs_infer_by_experience_bust.pdf",replace

estimates restore sale365_infer_boom
regsave
keep if regexm(var, "exp_bin")
split var, parse("#") gen(var_)
split var_1, parse(".") gen(a_)
rename a_1 infer_bin
replace infer_bin = "1" if infer_bin == "1b"
destring infer_bin, replace 
drop a_2 var_1
split var_2, parse(".") gen(a_)
rename a_1 exp_bin
replace exp_bin = "0" if exp_bin == "0b"
destring exp_bin, replace 
drop a_2 var_2
merge m:1 infer_bin using `infer', nogen keep(3)
rename log_infer_hp_orig_tier_diff_cens log_infer
gen coef_norm = coef + `sale365_mean_boom'
twoway (scatter coef_norm log_infer if exp_bin == 0,  msymbol(T)) ///
  (scatter coef_norm log_infer if exp_bin == 1, msymbol(Oh)) ///
  (scatter coef_norm log_infer if exp_bin == 2, msymbol(Th)) ///
  (scatter coef_norm log_infer if exp_bin == 3, msymbol(Sh)), ///
  xtitle("  Log(List Price / Inferred Price)")  legend(label(1 "Overall") label(2 "Bottom Exp. Tercile") label(3 "Middle Exp. Tercile") label(4 "Top Exp. Tercile")) ytitle("")
graph export "$path/output/figures/sale_prob_vs_infer_by_experience_boom.pdf",replace

restore
*/

/*** Entry Exit figures (broker panel) ***/
use $path/data/buyerseller_brokerpanel.dta, clear
*use $path/data/brokerpanel_main_vars.dta, clear
do $path/code/analysis/broker_panel_prelim.do
egen max_experience = max(all_experience_l1), by(agent_id mls_id)
drop if max_experience > 200
drop max_experience

sort agent year
fegen mls_year = group(mls_id year)
replace num_list = 0 if missing(num_list)

drop boom bust recovery period
gen bust = 1 if year==2007 | year==2008 | year==2009 | year==2010 | year==2011
gen medium = 1 if year==2006 | year==2012
gen boom = 1 if year==2002 |year==2003 |year==2004 | year==2005 | year==2013 | year==2014
gen period = 1*(bust==1) + 2 *(medium==1) + 3*(boom==1)
capture label drop periods
label define periods 1 "Bust" 2 "Medium" 3 "Boom"

label values period periods


/*** Summary Stats by Year **/

preserve
collapse  entrant, by(year)
rename year list_orig_year
merge 1:1 list_orig_year using `sale_data'
rename list_orig_year year
keep if year >= 2003 & year < 2014
twoway (line entrant year, lwidth(medthick) ytitle("Share of new agents")) ///
  (line sale365 year, ytitle("Share of listings sold",axis(2)) lwidth(medthick) yaxis(2)), ///
  xlabel(2003(4)2011 2015.1 " ") legend(off) ///
  text(0.215 2011.9 "Share of new agents", color(dblue) placement(east)) ///
  text(0.285  2011.9 "Share of listings sold", color(dred) placement(east)) ///
  xtitle("Year")
graph export "$path/output/figures/share_listings_share_entrants_by_year.pdf",replace
restore

preserve
collapse (sum) num_list active, by(year)
replace num_list = num_list / 1000
replace active = active / 1000
twoway (line num_list year, lwidth(medthick) ytitle("Number of agents (000s)")) ///
  (line active year, ytitle("Number of listings (000s)",axis(2)) lwidth(medthick) yaxis(2)), ///
  xlabel(2003(4)2011 2015.1 " ")  ///
  text(0.215 2011.9 "Share of new agents", color(dblue) placement(east)) ///
  text(0.285  2011.9 "Share of listings sold", color(dred) placement(east)) ///
  xtitle("Year")
restore

/** Entry exit by year**/
preserve
fcollapse (sum) num_list active (mean) exit entrant, by(year) 
twoway line exit entrant year, ///
  lwidth(medthick medthick) lpattern(dash solid) xtitle("Year") legend(label(1 "Exit Rate") label(2 "Entry Rate")) ///
  xlabel(2001(3)2013) ylabel(,format(%9.2f))
graph export "$path/output/figures/entry_exit_by_year.pdf",replace
restore

/** Exit by experience **/
preserve
gen exp_bin_graph = all_experience_l1
replace exp_bin_graph = 50 if all_experience_l1 > 50
drop if all_experience_l1 == .
fcollapse (sum) num_list active (mean) exit entrant, by(exp_bin_graph period)
reshape wide exit num_list active entrant, i(exp_bin_graph) j(period)
twoway scatter exit1 exit2 exit3 exp_bin_graph, ///
  xtitle("Agent Experience") xlabel(0(10)50 50 "50+" ) ytitle("") ///
  ylabel(,format(%9.2f)) legend(label(1 "Bust") label(2 "Medium") label(3 "Boom")) ///
  msymbol(square diamond )
graph export "$path/output/figures/exit_by_experience.pdf",replace
restore


/** Entry Tightness **/
preserve
gen fips_code = lmain_fips
replace fips_code = bmain_fips if missing(lmain_fips) & active==1
destring fips_code, replace
merge m:1 fips_code year using $path/data/fips_codes_data.dta
tab _merge

drop if _merge==2
drop _merge

foreach var in price_change vol_change price_change_no_fc list_vol_change ind_profit_change {
 gen `var'_cens = `var'
 sum `var',d
 replace `var'_cens =. if `var'<r(p1) | `var'>r(p99)
}

gen active_experience = all_experience_l1 if active == 1
replace active_experience = . if active_experience > 200


gen logexp = log(1+ active_experience)
fcollapse (mean) logexp exit* entrant  *price* *vol* *profit* active_experience listings sales (p50) med_exp = active_experience (p25) p25_exp = active_experience (p75) p75_exp = active_experience (iqr) iqr_exp = active_experience (sum)   num_agents=active, by(fips_code year)

gen easy_to_sell = sales/listings
replace entrant = . if year < 2003
replace exit = . if year > 2012
replace active_experience = . if year < 2002
replace med_exp = . if year < 2002
replace iqr_exp = . if year < 2002
replace p25_exp = . if year < 2002
replace p75_exp = . if year < 2002


foreach x of varlist entrant exit active_experience med_exp p25_exp p75_exp {
	binscatter2 `x' easy_to_sell [aw=listings],  absorb(fips_code) xtitle("Sales/Listings") ytitle("") lcolor(color2) mcolor(navy)  ylabel(, format(%9.2f))
	graph export $path/output/figures/`x'_inventory.pdf, replace
	binscatter2 `x' price_change_cens [aw=listings] ,  absorb(fips_code) xtitle("% Change in Price") ytitle("") lcolor(color2) mcolor(navy)  ylabel(,format(%9.2f))
	graph export $path/output/figures/`x'_price_change.pdf, replace 
	binscatter2 `x' vol_change_cens [aw=listings]  if year > 2002,  absorb(fips_code) xtitle("% Change in Volume") ytitle("") lcolor(color2) mcolor(navy)  ylabel(, format(%9.2f))
	graph export $path/output/figures/`x'_vol_change.pdf, replace 
}
egen num_obs = count(year), by(fips_code)
*keep if num_obs == 13
egen denom = total(listing)
estimates clear
foreach y of varlist entrant exit active_experience logexp  p25_exp  med_exp p75_exp  {
	eststo: reghdfe `y' easy_to_sell price_change_cens list_vol_change_cens [aw=listing]   , absorb(fips_code) cluster(fips_code)
	qui estadd local fips_fe="Yes"
	}

esttab using $path/output/tables/entry_exit_reasons.tex, booktabs replace frag b(%9.2f) se(%9.2f)  nobaselevels coeflabels(easy_to_sell "Sales / Listings" price_change_cens "$\Delta$ Sales Price" list_vol_change_cens "$\Delta$ Listing Volume") ///
  drop(_cons) obslast star(* 0.1  ** .05  *** 0.01) nomtitles nonotes r2(%9.4f) 		scalars("fips_fe  FIPS Code F.E.") nomtitles


restore

/** distribution of agents and listings by experience bin **/
preserve
keep if active == 1
gen exp_bin_graph = all_experience_l1
replace exp_bin_graph = 50 if all_experience_l1 > 50
drop if all_experience_l1 == .
tab entrant if exp_bin_graph == 0

fcollapse (sum) num_list active, by(exp_bin_graph)
foreach x of varlist num_list active {
	egen `x'_denom = total(`x')
	gen `x'_share = `x' / `x'_denom
	}

twoway bar num_list_share exp_bin_graph, xtitle("Agent Experience") xlabel(0(10)50 50 "50+") ytitle("") ylabel(0(.1).3,format(%9.2f)) plotregion(margin(b=0 l= 0.5)) barwidth(0.9)
graph export "$path/output/figures/listing_share_experience.pdf", replace
twoway bar active_share exp_bin_graph, xtitle("Agent Experience") xlabel(0(10)50 50 "50+") ytitle("") ylabel(0(.1).3,format(%9.2f)) plotregion(margin(b=0 l= 0.5)) barwidth(0.9)
graph export "$path/output/figures/agent_share_experience.pdf", replace
restore


preserve
keep if active == 1


estimates clear
eststo: reghdfe num_bs all_experience_l1, cluster(mls_id) absorb(lmain_fips list_year)
eststo: reghdfe num_bs c.all_experience_l1##ib3.period, cluster(mls_id) absorb(lmain_fips list_year)  
esttab using $path/output/tables/agent_experience_new_clients.tex, booktabs replace frag b(%9.2f) se(%9.2f)  nobaselevels coeflabels(all_experience_l1 "Agent Experience" 1.period "Bust" 2.period "Medium" 1.period#c.all_experience_l1 "Bust $\times$ Experience" 2.period#c.all_experience_l1 "Medium $\times$ Experience"  ) ///
  drop(_cons) obslast star(* 0.1  ** .05  *** 0.01) nomtitles nonotes r2(%9.4f) 		scalars("fips_fe  FIPS Code F.E.") 


fcollapse (p50) num_bs_50 = num_bs  (p25) num_bs_25 = num_bs (p75) num_bs_75 = num_bs (p90) num_bs_90 = num_bs (p10) num_bs_10 = num_bs, by(all_experience_l1) 
twoway    (rcap num_bs_75 num_bs_25 all_experience_l1 if all_experience_l1 < 101) (scatter num_bs_50 all_experience_l1 if all_experience_l1 < 101), xtitle("Agent Experience") ytitle("") legend(label(1 "25-75th percentile") label (2 "Median"))
graph export "$path/output/figures/agent_experience_new_clients.pdf", replace
restore




/*** Additional robustness tests for appendix **/

/** Death Sample **/
use $path/data/death_records, clear
gen list_orig_month = mofd(list_orig_dt_cl)
gen close_month = mofd(close_dt_cl)
egen zip_mo = group(zip_dw list_orig_month)
gen list_to_death = list_dt_cl - death_date
keep if list_to_death<730 & list_to_death>-30 // list within 2 years of death or divorce
duplicates tag prop_id_dw list_orig_dt_cl , gen(tag)
keep if tag == 0
merge 1:m  prop_id_dw list_orig_dt_cl using "$path/data/data_with_future_foreclosures.dta", keepusing(will_foreclose next_foreclosed_dt forced_sale) keep(1 3)
drop _merge
gen years_to_forecl = (next_foreclosed_dt -list_orig_dt_cl)/365
gen will_fc = will_foreclose
replace will_fc = 0 if years_to_forecl > 2
replace forced_sale = 0 if forced_sale == .
replace will_fc = . if forced_sale == 1
label var will_fc "Foreclosure in 2 years"


foreach x of varlist garage_num totalbaths_num_cl totalbeds_num living_area_cl {
   cap drop `x'_cens
   gen `x'_cens = `x'
   sum `x'_cens, d
   replace `x'_cens = . if `x'< r(p1) | `x' > r(p99)
	}


gen bust = 1 if list_orig_year==2007 | list_orig_year==2008 | list_orig_year==2009 | list_orig_year==2010 | list_orig_year==2011 
gen medium = 1 if list_orig_year==2006 | list_orig_year==2012
gen boom =1 if list_orig_year==2002 |list_orig_year==2003 |list_orig_year==2004 | list_orig_year==2005 | list_orig_year==2013 | list_orig_year==2014
gen period = 1*(bust==1) + 2 *(medium==1) + 3*(boom==1)
label define periods 1 "Bust" 2 "Medium" 3 "Boom"
label values period periods

gen log_living_area_cl_cens = log(living_area_cl_cens)

egen fips_mo = group(fips_code list_orig_month)
egen fips_cl_mo = group(fips_code close_month)
egen agent = group(mls_id lagent_id)
gen log_close_price_cens = log(close_price_cens)
gen log_close_price = log_close_price_cens
gen log_list_price_orig_cens = log(list_price_orig_cens)
gen log_close_price_orig_cens = log(close_price_cens)
gen equity_stakes_tier = (hpi_low_t2/hpi_low_t0)-1 if tier == 1
replace equity_stakes_tier = (hpi_mid_t2/hpi_mid_t0)-1 if tier == 2
replace equity_stakes_tier = (hpi_high_t2/hpi_high_t0)-1 if tier == 3
label var equity_stakes_tier "Appreciation since Purch."

global controls i.view_flag i.cooling_flag i.garage_flag i.garage_num_cens i.new_flag log_living_area_cl_cens ///
     i.basement_flag i.totalbaths_num_cl_cens i.totalbeds_num_cens i.waterfront_flag i.fireplace_flag
local exp all_experience_l1 
gen logexp = log(1+all_experience_l1)

gen sale365 = sale2==1 & days_to_sale<=365
replace sale365=0 if sale2==0 | days_to_sale>365

egen fips_ym = group(fips_code list_orig_month)
binscatter2 sale365 logexp, absorb(fips_ym) controls( $controls)  lcolor(black ) mcolor(navy) ///
  ytitle("") xtitle("Log(Agent Experience+1)") 
graph export "$path/output/figures/sale_prob_experience_death.pdf",replace
label var logexp "Log(Exp + 1)"
gen loginfer_hp_tier_cens = loginfer_hp_tier if infer_hp_orig_tier_diff_cens != .
gen discount = log_list_price_orig - log(close_price)
gen discount_cens = discount
sum discount_cens, d
replace discount_cens = . if discount< r(p1) | discount > r(p99)
gen log_infer_hp_orig_tier_diff_cens = log(infer_hp_orig_tier_diff_cens)
gen log_list = log_list_price_orig
gen log_close = log_close_price_cens
gen linfer_price = loginfer_hp_tier_cens
gen linfer_diff = log_infer_hp_orig_tier_diff_cens
gen loginfer_hp_sale_diff = log_close_price - loginfer_hp_tier_cens
gen discount2 = discount if loginfer_hp_sale_diff != .
destring fips_code, replace


foreach outcome of varlist sale365 will_fc days_to_sale  dom_ggp log_list log_close discount linfer_price linfer_diff discount2  loginfer_hp_sale_diff {
	estimates clear

	qui eststo dth3_`outcome': reghdfe `outcome'  logexp c.logexp#ib3.period $controls, cluster(mls_id) absorb(fips_code#list_orig_month)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				

	estwrite dth*_`outcome' using $path/output/tables/regressions, append			
	}


/*** Chula Vista Sample **/
use $path/data/SD9191_market_full_w_deeds_prepped.dta, clear
gen list_orig_month = mofd(list_orig_dt_cl)
gen close_month = mofd(close_dt_cl)
egen zip_mo = group(zip_dw list_orig_month)
duplicates tag prop_id_dw list_orig_dt_cl , gen(tag)
keep if tag == 0
merge 1:m  prop_id_dw list_orig_dt_cl using "$path/data/data_with_future_foreclosures.dta", keepusing(will_foreclose next_foreclosed_dt forced_sale) keep(1 3)
drop _merge
gen years_to_forecl = (next_foreclosed_dt -list_orig_dt_cl)/365
gen will_fc = will_foreclose
replace will_fc = 0 if years_to_forecl > 2
replace forced_sale = 0 if forced_sale == .
replace will_fc = . if forced_sale == 1
label var will_fc "Foreclosure in 2 years"


foreach x of varlist garage_num totalbaths_num_cl totalbeds_num living_area_cl {
   cap drop `x'_cens
   gen `x'_cens = `x'
   sum `x'_cens, d
   replace `x'_cens = . if `x'< r(p1) | `x' > r(p99)
	}


gen bust = 1 if list_orig_year==2007 | list_orig_year==2008 | list_orig_year==2009 | list_orig_year==2010 | list_orig_year==2011 
gen medium = 1 if list_orig_year==2006 | list_orig_year==2012
gen boom =1 if list_orig_year==2002 |list_orig_year==2003 |list_orig_year==2004 | list_orig_year==2005 | list_orig_year==2013 | list_orig_year==2014
gen period = 1*(bust==1) + 2 *(medium==1) + 3*(boom==1)
label define periods 1 "Bust" 2 "Medium" 3 "Boom"
label values period periods

gen log_living_area_cl_cens = log(living_area_cl_cens)

egen fips_mo = group(fips_code list_orig_month)
egen fips_cl_mo = group(fips_code close_month)
egen agent = group(mls_id lagent_id)
gen log_close_price_cens = log(close_price_cens)
gen log_close_price = log_close_price_cens
gen log_list_price_orig_cens = log(list_price_orig_cens)
gen log_close_price_orig_cens = log(close_price_cens)
gen equity_stakes_tier = (hpi_low_t2/hpi_low_t0)-1 if tier == 1
replace equity_stakes_tier = (hpi_mid_t2/hpi_mid_t0)-1 if tier == 2
replace equity_stakes_tier = (hpi_high_t2/hpi_high_t0)-1 if tier == 3
label var equity_stakes_tier "Appreciation since Purch."

global controls i.view_flag i.cooling_flag i.garage_flag i.garage_num_cens i.new_flag log_living_area_cl_cens ///
     i.basement_flag i.totalbaths_num_cl_cens i.totalbeds_num_cens i.waterfront_flag i.fireplace_flag
local exp all_experience_l1 
gen logexp = log(1+all_experience_l1)

gen sale365 = sale2==1 & days_to_sale<=365
replace sale365=0 if sale2==0 | days_to_sale>365

egen fips_ym = group(fips_code list_orig_month)
binscatter2 sale365 logexp,  absorb(zip_mo) controls( $controls)  lcolor(black ) mcolor(navy) ///
  ytitle("") xtitle("Log(Agent Experience+1)") 
graph export "$path/output/figures/sale_prob_experience_CV.pdf",replace

label var logexp "Log(Exp + 1)"

gen loginfer_hp_tier_cens = loginfer_hp_tier if infer_hp_orig_tier_diff_cens != .
gen discount = log_list_price_orig - log(close_price)
gen discount_cens = discount
sum discount_cens, d
replace discount_cens = . if discount< r(p1) | discount > r(p99)
gen log_infer_hp_orig_tier_diff_cens = log(infer_hp_orig_tier_diff_cens)
gen log_list = log_list_price_orig
gen log_close = log_close_price_cens
gen linfer_price = loginfer_hp_tier_cens
gen linfer_diff = log_infer_hp_orig_tier_diff_cens
gen loginfer_hp_sale_diff = log_close_price - loginfer_hp_tier_cens
gen discount2 = discount if loginfer_hp_sale_diff != .


destring fips_code, replace

foreach outcome of varlist sale365 will_fc days_to_sale  dom_ggp log_list log_close discount linfer_price linfer_diff discount2  loginfer_hp_sale_diff {
	estimates clear

	qui eststo cv3_`outcome': reghdfe `outcome'  logexp c.logexp#ib3.period $controls, cluster(zip_dw) absorb(fips_code#list_orig_month)
	qui estadd local zip_time_fe="Yes"
	qui estadd local house_char_fe="Yes"
	qui estadd local equity_stake="No"
	qui estadd local infer_hp="No"				

	estwrite cv3_`outcome' using $path/output/tables/regressions, append			
	}



do $path/code/analysis/construct_tables.do
